##Setup and Import Data
#Std Libraries
library(openair); #vis pkg
library(tidyverse); #helps vis pkg
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(worldmet); #met pkg
####Get Germany AQ Data
library(dplyr); #data manip fxns
library(saqgetr); #euro pkg
#view all sites and sort down to desired location
sites_dat <- get_saq_sites();
de_sites <- filter(sites_dat, country == 'germany');
berlin_sites <- filter(de_sites, latitude > 52.3 & latitude < 52.5 & longitude > 13.4 & longitude < 13.6)
#see this
head(de_sites)
## # A tibble: 6 x 16
## site site_name latitude longitude elevation country country_iso_code
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 debb001 Burg (Spreewald) 51.8 14.1 54 germany DE
## 2 debb003 Brandenburg a.d… 52.4 12.5 33 germany DE
## 3 debb006 Cottbus-Süd 51.7 14.3 76 germany DE
## 4 debb007 Elsterwerda 51.5 13.5 89 germany DE
## 5 debb008 Finsterwalde 51.6 13.7 110 germany DE
## 6 debb009 Forst 51.7 14.6 75 germany DE
## # … with 9 more variables: site_type <chr>, site_area <chr>, date_start <dttm>,
## # date_end <dttm>, network <chr>, eu_code <chr>, eoi_code <chr>,
## # observation_count <dbl>, data_source <chr>
#let's test with Blankenfelde-Mahlow
prac_dat <- get_saq_observations(
site = "debb086",
start = 2011,
end = 2015,
verbose = TRUE
)
## 2022-01-26 18:26:14.818 EST: Loading `air_quality_data_site_debb086_2011.csv.gz`...
## 2022-01-26 18:26:15.289 EST: Loading `air_quality_data_site_debb086_2012.csv.gz`...
## 2022-01-26 18:26:15.441 EST: Loading `air_quality_data_site_debb086_2013.csv.gz`...
## 2022-01-26 18:26:15.880 EST: Loading `air_quality_data_site_debb086_2014.csv.gz`...
## 2022-01-26 18:26:16.757 EST: Loading `air_quality_data_site_debb086_2015.csv.gz`...
####Get Germany MET Data
#first find a station near the lat/long of the aq site
#the next fxn produces a map, don't run if not needed
#getMeta(lat = 52.54, lon = 13.34, returnMap = TRUE)
#there is only one site for Berlin, so we'll take that
de_met <- importNOAA(code = "103850-99999", year = 2011:2015);
#you can then merge the data together by date as noted in documentation
#OPTION 1: subset and pivot
#cut irrelevant rows
new_prac_dat <- subset(prac_dat, select = -c(date_end, site, validity, summary, unit));
#first mutate the extracted pollutant data
prac_dat2 <- pivot_wider(
new_prac_dat,
id_cols = c(date, process),
names_from = variable,
values_from = value,
values_fill = NA
)
#not quite perfect but very close. What are the different processes and can they be averaged?
#OPTION 2: saq_clean
prac_dat3 <- prac_dat %>%
saq_clean_observations(summary = "hour", valid_only = TRUE, spread = TRUE);
#that works well but the processes disappeared
####Append and Confirm
#now append the data
aq_berlin1 <- left_join(
de_met,
prac_dat3,
by = "date"
)
#compare to basic data from text example as this has the ideal formatting
base_dat <- importAURN(site = "my1", year = 2000:2005);
head(base_dat)
## # A tibble: 6 x 13
## site code date co nox no2 no o3 so2 pm10
## <chr> <fct> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 London Ma… MY1 2000-01-01 00:00:00 2.6 388 78 203 2 16 55
## 2 London Ma… MY1 2000-01-01 01:00:00 8.4 886 84 525 4 35 96
## 3 London Ma… MY1 2000-01-01 02:00:00 7.4 816 117 458 4 32 91
## 4 London Ma… MY1 2000-01-01 03:00:00 4.5 636 97 353 0 24 73
## 5 London Ma… MY1 2000-01-01 04:00:00 3.4 483 74 268 0 19 55
## 6 London Ma… MY1 2000-01-01 05:00:00 1.9 231 65 109 0 11 31
## # … with 3 more variables: pm2.5 <dbl>, ws <dbl>, wd <dbl>
head(aq_berlin1)
## # A tibble: 6 x 31
## code station date latitude longitude elev ws wd
## <fct> <fct> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 103850-99999 SCHONEF… 2011-01-01 00:00:00 52.4 13.5 47.8 7.7 255.
## 2 103850-99999 SCHONEF… 2011-01-01 01:00:00 52.4 13.5 47.8 9.55 260
## 3 103850-99999 SCHONEF… 2011-01-01 02:00:00 52.4 13.5 47.8 9 260
## 4 103850-99999 SCHONEF… 2011-01-01 03:00:00 52.4 13.5 47.8 9.25 254.
## 5 103850-99999 SCHONEF… 2011-01-01 04:00:00 52.4 13.5 47.8 10.3 250
## 6 103850-99999 SCHONEF… 2011-01-01 05:00:00 52.4 13.5 47.8 10.3 240
## # … with 23 more variables: air_temp <dbl>, atmos_pres <dbl>, visibility <dbl>,
## # dew_point <dbl>, RH <dbl>, ceil_hgt <dbl>, cl_1 <dbl>, cl_2 <dbl>,
## # cl_3 <dbl>, cl <dbl>, cl_1_height <dbl>, cl_2_height <dbl>,
## # cl_3_height <dbl>, pwc <chr>, date_end <dttm>, site <chr>, co <dbl>,
## # no <dbl>, no2 <dbl>, nox <dbl>, o3 <dbl>, pm10 <dbl>, pm2.5 <dbl>
###Wind Rose
#plot a windrose for everything
windRose(aq_berlin1)
#plot by year
windRose(aq_berlin1, type = "year", layout = c(3, 2))
#plot by pm10 (the type function is useful!)
windRose(aq_berlin1, type = "pm10", layout = c(3, 2))
## Warning: removing 17903 missing rows due to pm10
###Pollution Rose
#basic example for nox
pollutionRose(aq_berlin1, pollutant = "nox")
#link with s02 using type
pollutionRose(aq_berlin1, pollutant = "nox", type = "no2", layout = c(4, 1))
## Warning: removing 17587 missing rows due to no2
#segment and normalize
pollutionRose(aq_berlin1, pollutant = "nox", seg = 1, normalise = TRUE)
#seg = 1 removes the spaces between the bars
#normalize makes the length of all bars the same to see pollutant proportions more clearly
###Polar Frequencies
#Basic example, a more detailed wind description
polarFreq(aq_berlin1)
#bin by year
polarFreq(aq_berlin1, type = "year")
#add pollutant
polarFreq(aq_berlin1, type = "year", pollutant = "pm10", statistic = "mean", min.bin = 2)
# we can see in detail how high concentrations came from SE in 2000, but moved north and decreased in frequency starting in 2003
###Percentile Rose
#basic example, the concentrations of the pollutant are marked on the rings and the percentiles are shaded
percentileRose(aq_berlin1, pollutant = "pm10")
#can also customize the percentiles and smooth the plot
percentileRose(aq_berlin1, pollutant = "pm10", percentile = c(25, 50, 75, 90, 95, 99), col = "jet", smooth = TRUE)
#we see the pm10 is pretty even from all directions
###Polar Plot
#basic polar plot
polarPlot(aq_berlin1, pollutant = "pm10")
# so we see that there is only a high conc from the sw when the wind speed is higher than usual
###Polar Annulus
#make polar annuli for all data, by season, by weekday, and by hour in a day
#polarAnnulus(aq_berlin1, poll = "pm10", period = "trend", main = "Trend")
polarAnnulus(aq_berlin1, poll = "pm10", period = "season", main = "Season")
polarAnnulus(aq_berlin1, poll = "pm10", period = "weekday", main = "Weekday")
polarAnnulus(aq_berlin1, poll = "pm10", period = "hour", main = "Hour")
# shows direction of pollution over time from inner most to outermost ring
###Time Series Plots
#unfinished
###Temporal Variations
#unfinished